
******************************************
* PROCESSING THE SPATIAL JOIN FILES      *
*            ------------                *
* This do-file processes files from the  *
* spatial join step. The objective is to *
* compare the assignations resulting from*
* each of the three spatial join option :* 
* within, border and center and chose    *
* the optimal assignation for each plant.*
* The procedure is performed for each    *
* polydon type : parcel, building and    *
* height.                          	     *  
******************************************


// NOTES:

// We always prefer the within method (within) > centroid method (cent) > polygon method (poly)
// Assignments are evaluated in that order to create the variables

// the _d means 'declared'
// the _c means 'computed' (from the polygons in the shapefiles) ; this is never missing for cent and poly (but can be missing for within)


clear all
set more off

// Working directories
cd "$workpath"

//global outpath_OLD = "$outpath" + "_OLD"

// Locals we need for looping across years and IO directories
local years "2001 2003 2005 2007 2009 2011 2013 2017"

scalar alpha = 75  																								  

//Threshold distance to consider assignations
scalar beta = 10


// -----------------------------------------------
// STEP 1 : Processing parcels polygons
// -----------------------------------------------

use "$privatedata/scotts_p.dta", clear
//use "$datapath/private/scotts_p.dta", clear

scalar alpha = 75  																								  //Threshold distance to consider assignations
scalar beta = 10																								  //Max number of neighbors for a polygon not to be considered as corridor or road

// 1.A.  Replacing 0 values by missing
replace witharea_d = . if witharea_d == 0
replace centarea_d = . if centarea_d == 0
replace polyarea_d = . if polyarea_d == 0

// 1.B. Choosing between the three options of spatial join : within, poly, and centroid
//      For calculated values, i.e., those computed from the polygons using GIS tools

//gen corridors = .
//replace corridors = (accuracy != "ROOFTOP" & withneighb != .) | (withneighb > beta & withneighb != .)             // Not rooftop with within assignation OR on a polygon withnmore than 10 neighbors
gen corridors=(accuracy!="ROOFTOP" & withneighb!=.) | (withneighb>beta & withneighb!=.)   				/*Identifying corridors : not a rooftop precision with within assignation OR within assignation on a polygon withnmore than 10 neighbors*/
replace corridors=. if witharea_c==.

// 1.C.
gen P_area_c = witharea_c if withneighb != .																	  // If within assignation, take that as the parcel area
replace P_area_c = . if corridors == 1									 							              //Removing areas' assignations from corridors

replace P_area_c = centarea_c if P_area_c == . & (centdist < polydist & centneighb <= beta & centdist <= alpha)   //taking the surface from the method yielding the lowest between the polygon and plant
replace P_area_c = polyarea_c if P_area_c == . & (centdist >= polydist & polyneighb <= beta & polydist <= alpha)  //taking the surface from the method yielding the lowest between the polygon and plant

replace P_area_c = centarea_c if P_area_c == . & (centneighb <= beta & centdist <= alpha)						  // Assigning measures for remaining missings
replace P_area_c = polyarea_c if P_area_c == . & (polyneighb <= beta & polydist <= alpha)						  // Assigning measures for remaining missings

// 1.D. For declared values : Those found in the datasets
gen P_area_d = witharea_d if withneighb != .
replace P_area_d = . if corridors == 1									 									      //Removing areas' assignations from corridors

replace P_area_d = centarea_d if P_area_d == . & (centdist < polydist & centneighb <= beta & centdist <= alpha)   //Take the surface from the method yielding the lowest between the polygon and plant
replace P_area_d = polyarea_d if P_area_d == . & (centdist >= polydist & polyneighb <= beta & polydist <= alpha)  //Take the surface from the method yielding the lowest between the polygon and plant

replace P_area_d = centarea_d if P_area_d == . & (centneighb <= beta & centdist <= alpha)						  //Assign measures for remaining missings
replace P_area_d = polyarea_d if P_area_d == . & (polyneighb <= beta & polydist <= alpha)						  //Assign measures for remaining missings
replace P_area_d = -P_area_d if P_area_d < 0		 															  //Small number of likely errors for which the surface has an incorrect minus sign

// 1.E. Create the parcel identifier coherent with the final assignations
gen idparcel = withidsup if withneighb != .																		  // If within assignation, take that as the parcel id
replace idparcel = "" if corridors == 1																		      //Remove idsup' assignations from corridors

replace idparcel = centidsup if idparcel == "" & (centdist <polydist & centneighb <= beta & centdist <= alpha)	  //Take the idsup from the method yielding the lowest between the polygon and plant
replace idparcel = polyidsup if idparcel == "" & (centdist >= polydist & polyneighb <= beta & polydist <= alpha)  //Take the idsup from the method yielding the lowest between the polygon and plant

replace idparcel = centidsup if idparcel == "" & (centneighb <= beta & centdist <= alpha)	 					  //Assign idsup for remaining missings
replace idparcel = polyidsup if idparcel == "" & (polyneighb <= beta & polydist <= alpha)						  //Assign idsup for remaining missings

// 1.F. Create the neighbors coherent with the final assignations
gen neighbors = withneighb if withneighb != .																	  // If within assignation, take that as the neighbor counts
replace neighbors = . if corridors == 1	 																	      //Remove neighbors assignations from corridors

replace neighbors = centneighb if neighbors == . & (centdist < polydist & centneighb <= beta & centdist <= alpha) //Take the neighb from the method yielding the lowest between the polygon and plant
replace neighbors = polyneighb if neighbors == . & (centdist >= polydist & polyneighb <= beta & polydist <= alpha) //Take the neighb from the method yielding the lowest between the polygon and plant

replace neighbors = centneighb if neighbors == . & (centneighb <= beta & centdist <= alpha)						  //Assign neighb for remaining missings
replace neighbors = polyneighb if neighbors == . & (polyneighb <= beta & polydist <= alpha)						  //Assign neighb for remaining missings

// 1.G. Create the distance coherent with the final assignations*
gen dist = 0 if withneighb != .																				      // If within assignation, take that as the distance
replace dist = . if corridors == 1																			      //Remove dist assignations from corridors

replace dist = centdist if dist == . & (centdist < polydist & centneighb <= beta & centdist <= alpha)			  //Take the dist from the method yielding the lowest between the polygon and plant
replace dist = polydist if dist == . & (centdist >= polydist & polyneighb <= beta & polydist <= alpha)			  //Take the dist from the method yielding the lowest between the polygon and plant

replace dist = centdist if dist == . & (centneighb <= beta & centdist <= alpha)									  //Assign dist for remaining missings
replace dist = polydist if dist == . & (polyneighb <= beta & polydist <= alpha)									  //Assign dist for remaining missings

// 1.H. Choose one assignation for plants with more than 1 assignation (frontier of two or more polygons)  
egen disttest = min(dist), by(adressid)																	          //Generate the min dist between all the assignation processes and polygons for each address
gen dup = dist - disttest																				          //Identify adressid that has more than one assignation
drop if dup > 0   																						          //we keep the assignation for which dup=0 and we drop the others
duplicates drop adressid, force																			          //We drop duplicates if any still exist
order _all, alphabetic																					          //Order variables to avoid confusion during the appending processes

drop dup* accuracy* corridors disttest																	          //We drop useless variables

// 1.I. Correct some variable names and generate some other useful variables
replace state_id = subinstr(state_id, "_", "", .)															      //Correct the names of polygons from with the area measures come from
gen prov_statep = substr(state_id,1,2)																	          //Generate a variable indicating the province to which the parcels polygon is related to
replace prov_statep = upper(prov_statep)																	      //Transform province names in uppercase
replace prov_statep = "MB" if prov_statep == "MA"															      //Hamonize the short name of Manitoba with what already exists in the database
replace prov_statep = "SK" if prov_statep == "SA"															      //Hamonize the short name of Saskatchewan with what already exists in the database

// 1.J. Generate a variable that tells us which assignments give us the final result
gen assignedby = "border" if idparcel == polyidsup															
replace assignedby = "centroid" if idparcel == centidsup
replace assignedby = "within" if idparcel == withidsup
replace assignedby = "centroid-border" if idparcel == centidsup & idparcel == polyidsup &idparcel != withidsup
replace assignedby = "within-border" if idparcel == withidsup & idparcel == polyidsup &idparcel != centidsup
replace assignedby = "within-center" if idparcel == withidsup & idparcel == centidsup &idparcel != polyidsup
replace assignedby = "centroid-border-within" if idparcel == centidsup & idparcel == polyidsup & idparcel == withidsup

// 1.K Rename variables, do final cleanups
ren dist distp
ren idparcel idparcelp
ren polygon polygonp
ren state_id state_idp
ren assignedby assignedbyp

tostring idparcel*, replace
replace idparcelp = subinstr(idparcelp, " ", "", .)
replace idparcelp = subinstr(idparcelp, " ", "", .)
replace idparcelp = subinstr(idparcelp, " ", "", .)
replace idparcelp = subinstr(idparcelp, ".", "", .)

replace P_area_c=. if P_area_c==0
replace P_area_d=. if P_area_d==0
save "$outpath/scotts_parcels.dta", replace



// -----------------------------------------------
// STEP 2 : Processing building polygons
// -----------------------------------------------

use "$privatedata/scotts_b.dta", clear

// 2.A. Replacing zeros by missing
// Note: There are no declared values at all in the shapefiles, so these are all missing/zero initially
replace witharea_d = . if witharea_d == 0
replace centarea_d = . if centarea_d == 0
replace polyarea_d = . if polyarea_d == 0


// 2.B. Choosing between the three options of spatial join : within, poly, and centroid
gen corridors = (accuracy != "ROOFTOP" & withneighb != .) | (withneighb > beta & withneighb != .)     	        // Not rooftop with within assignation OR on a polygon withnmore than 10 neighbors
replace corridors = . if witharea_c == .																        //Remove areas' assignations from corridors

gen B_area_c = witharea_c if withneighb != .															        //Generate the raw area variable : Giving premium to within method
replace B_area_c = . if corridors == 1									 									    //Removing areas' assignations from corridors
replace B_area_c = centarea_c if (B_area_c == . & centdist < polydist & centneighb <= beta & centdist <= alpha) //Take the surface from the method yielding the lowest between the polygon and plant
replace B_area_c = polyarea_c if (B_area_c == . & centdist >= polydist & polyneighb <= beta & polydist <= alpha) //Take the surface from the method yielding the lowest between the polygon and plant
replace B_area_c = centarea_c if (B_area_c == . & centneighb <= beta & centdist <= alpha)						//Assign measures for remaining missings
replace B_area_c = polyarea_c if (B_area_c == . & polyneighb <= beta & polydist <= alpha)						//Assign measures for remaining missings

// 2.C. For declared values : Those found in the datasets
//
// Note: Previous version (B_area_d = centarea_c and B_area_d = polyarea_c in the last two instructions,
// as well as < beta strict)
//
gen B_area_d = witharea_d if withneighb != .																    //Generate the raw area variable : Giving premium to within method
replace B_area_d = . if corridors == 1									 									    //Remove areas' assignations from corridors
replace B_area_d = centarea_d if (B_area_d == . & centdist < polydist & centneighb <= beta & centdist <= alpha) //Take the surface from the method yielding the lowest between the polygon and plant
replace B_area_d = polyarea_d if (B_area_d == . & centdist >=polydist & polyneighb <= beta & polydist <= alpha) //Take the surface from the method yielding the lowest between the polygon and plant
replace B_area_d = centarea_d if (B_area_d == . & centneighb <= beta & centdist <= alpha)						//Assign measures for remaining missings
replace B_area_d = polyarea_d if (B_area_d == . & polyneighb <= beta & polydist <= alpha)						//Assign measures for remaining missings
replace B_area_d = -B_area_d if B_area_d < 0																    //Small number of likely errors for which the surface has an incorrect minus sign

// 2.D. Create the parcel identifier that is coherent with the final assignations
gen idparcel = withidsup if withneighb != .																	    //Generate the identifier variable : Giving premium to within method
replace idparcel = "" if corridors == 1																		    //Remove idsup' assignations from corridors
replace idparcel = centidsup if (idparcel == "" & centdist < polydist & centneighb <= beta & centdist <= alpha)	//Take the idsup from the method yielding the lowest between the polygon and plant
replace idparcel = polyidsup if (idparcel == "" & centdist >= polydist & polyneighb <= beta & polydist <= alpha) //Take the idsup from the method yielding the lowest between the polygon and plant
replace idparcel = centidsup if (idparcel == "" & centneighb <= beta & centdist <= alpha) 						//Assign idsup for remaining missings
replace idparcel = polyidsup if (idparcel == "" & polyneighb <= beta & polydist <= alpha)					    //Assigning idsup for remaining missings

// 2.E. Create the neighbors coherent with the final assignations
gen neighbors = withneighb if withneighb != .																    //Generate the neighbor variable : Giving premium to within method
replace neighbors = . if corridors == 1	 																	    //Remove neighbors assignations from corridors
replace neighbors = centneighb if (neighbors == . & centdist < polydist & centneighb <= beta & centdist <= alpha)  //Take the neighb from the method yielding the lowest between the polygon and plant*/
replace neighbors = polyneighb if (neighbors == . & centdist >= polydist & polyneighb <= beta & polydist <= alpha) //Take the neighb from the method yielding the lowest between the polygon and plant*/
replace neighbors = centneighb if (neighbors == . & centneighb <= beta & centdist <= alpha)						//Assign neighb for remaining missings*/
replace neighbors = polyneighb if (neighbors == . & polyneighb <= beta & polydist <= alpha)						//Assign neighb for remaining missings*/

// 2.F. Create the distance coherent with the final assignations
gen dist = 0 if withneighb != .																				    //Generate the distance variable : Giving premium to within method
replace dist = . if corridors == 1																			    //Remove dist assignations from corridors
replace dist = centdist if (dist == . & centdist < polydist & centneighb <= beta & centdist <= alpha)				//Take the dist from the method yielding the lowest between the polygon and plant
replace dist = polydist if (dist == . & centdist >= polydist & polyneighb <= beta & polydist <= alpha)		    //Take the dist from the method yielding the lowest between the polygon and plant
replace dist = centdist if (dist == . & centneighb <= beta & centdist <= alpha)									//Assign dist for remaining missings
replace dist = polydist if (dist == . & polyneighb <= beta & polydist <= alpha)									//Assign dist for remaining missings

// 2.G. Choose one assignation for plants with more than 1 assignation due to their location at the frontier of two or more polygons  
egen disttest = min(dist), by(adressid)																	        //Generate the min dist between all the assignation processes and polygons for each addresslocation
gen dup = dist - disttest																					    //Identify adressid that has more than one assignation
drop if dup > 0   																						        //we keep the assignation for which dup=0 and we drop the others
duplicates drop adressid, force																			        //We drop duplicates if any still exist
order _all, alphabetic																					        //Order variables to avoid confusion during the appending processes
drop dup* accuracy* corridors disttest																	        //We drop useless variables

// 2.H. Correct some variables names and generating some other useful variables
replace state_id = subinstr(state_id, "_", "", .)
gen prov_stateb = substr(state_id, 1, 2)
replace prov_stateb = "AB" if prov_stateb == "al"
replace prov_stateb = "BC" if prov_stateb == "br"
replace prov_stateb = "MB" if prov_stateb == "ma"
replace prov_stateb = "NS" if prov_stateb == "no"
replace prov_stateb = "PE" if prov_stateb == "pr"
replace prov_stateb = "QC" if prov_stateb == "qu"
replace prov_stateb = "SK" if prov_stateb == "sa"
replace prov_stateb = "YT" if prov_stateb == "yu"
replace prov_stateb = "NB" if prov_stateb == "ne"
replace prov_stateb = upper(prov_stateb)

// 2.I. Generate a variable that tells us which assignments give us the final result
gen assignedby = "border" if idparcel == polyidsup
replace assignedby = "centroid" if idparcel == centidsup
replace assignedby = "within" if idparcel == withidsup
replace assignedby = "within-border" if (idparcel == withidsup & idparcel == polyidsup & idparcel != centidsup)
replace assignedby = "within-centroid" if (idparcel == withidsup & idparcel == centidsup & idparcel != polyidsup)
replace assignedby = "centroid-border" if (idparcel == centidsup & idparcel == polyidsup & idparcel != withidsup)
replace assignedby = "centroid-border-within" if (idparcel == centidsup & idparcel == polyidsup & idparcel == withidsup)

// 2.J. Rename some variables
ren dist distb
ren idparcel idparcelb
ren polygon polygonb
ren state_id state_idb
ren assignedby assignedbyb

replace state_idb = "alberta" if substr(state_idb, 1, 7) == "alberta"
replace state_idb = "britishcolumbia" if substr(state_idb, 1, 7) == "british"
replace state_idb = "manitoba" if substr(state_idb, 1, 7) == "manitob"
replace state_idb = "newbrunswick" if substr(state_idb, 1, 7) == "newbrun"
replace state_idb = "ontario" if substr(state_idb, 1, 7) == "ontario"
replace state_idb = "quebec" if substr(state_idb, 1, 6) == "quebec"
replace state_idb = "saskatchewan" if substr(state_idb, 1, 7) == "saskatc"

replace idparcelb = subinstr(idparcelb, " ", "", .)
replace idparcelb = subinstr(idparcelb, " ", "", .)
replace idparcelb = subinstr(idparcelb, " ", "", .)
replace idparcelb = subinstr(idparcelb, ".", "", .)

replace B_area_d = . if B_area_d == 0
replace B_area_c = . if B_area_c == 0
save "$outpath/scotts_buildings.dta", replace


// --------------------------------------------------------
// STEP 3 : Process  floorspace : building with heights
// --------------------------------------------------------

use "$privatedata/scotts_h.dta", clear

* a. Replacing 0 values by missing
replace witharea_d=. if witharea_d==0
replace centarea_d=. if centarea_d==0
replace polyarea_d=. if polyarea_d==0

* b. Choosing between the three options of spatial join : within, poly and Centro
	   *For calculated values : Those computed from the polygons using GIS tools

gen corridors = .
replace corridors = (accuracy != "ROOFTOP" & withneighb != .) | (withneighb > beta & withneighb != .)      /*Identifying corridors : not a rooftop precision with within assignation OR within assignation on a polygon withnmore than 10 neighbors*/

// OLD
//gen corridors = (accuracy != "ROOFTOP" & withneighb != .) | (withneighb > beta & withneighb != .)      /*Identifying corridors : not a rooftop precision with within assignation OR within assignation on a polygon withnmore than 10 neighbors*/

//replace corridors=. if witharea_c==.																	/*Removing areas' assignations from corridors*/
// OLD

// Step 1
gen FB_area_c = witharea_c if withneighb != .	// If within assignation, take that as the parcel area
/*Generating the raw area variable : Giving premium to within method*/
replace FB_area_c = . if corridors == 1									 							              //Removing areas' assignations from corridors

replace FB_area_c = centarea_c if FB_area_c == . & (centdist < polydist & centneighb <= beta & centdist <= alpha)   //taking the surface from the method yielding the lowest between the polygon and plant
replace FB_area_c = polyarea_c if FB_area_c == . & (centdist >= polydist & polyneighb <= beta & polydist <= alpha)  //taking the surface from the method yielding the lowest between the polygon and plant

replace FB_area_c = centarea_c if FB_area_c == . & (centneighb <= beta & centdist <= alpha)						  // Assigning measures for remaining missings
replace FB_area_c = polyarea_c if FB_area_c == . & (polyneighb <= beta & polydist <= alpha)						  // Assigning measures for remaining missings*/

// Step 2
*For declared values : Those found in the datasets
gen FB_area_d = witharea_d if withneighb != .
/*Generating the raw area variable : Giving premium to within method*/
replace FB_area_d = . if corridors == 1									 									/*Removing areas' assignations from corridors*/

replace FB_area_d = centarea_d if FB_area_d == . & (centdist < polydist & centneighb <= beta & centdist <= alpha)		/*taking the surface from the method yielding the lowest between the polygon and plant*/
replace FB_area_d = polyarea_d if FB_area_d == . & (centdist >= polydist & polyneighb <= beta & polydist <= alpha)		/*taking the surface from the method yielding the lowest between the polygon and plant*/

replace FB_area_d = centarea_d if FB_area_d == . & (centneighb <= beta & centdist <= alpha)							/*Assigning measures for remaining missings*/				
replace FB_area_d = polyarea_d if FB_area_d == . & (polyneighb <= beta & polydist <= alpha)							/*Assigning measures for remaining missings*/
replace FB_area_d = -FB_area_d if FB_area_d < 0		// small number of likely errors for which the surface has an incorrect minus sign


// Step 3
*Creating the parcel identifier coherent with the final assignations*
gen idparcel = withidsup if withneighb != .
/*Generating the identifier variable : Giving premium to within method*/
replace idparcel = "" if corridors == 1																		/*Removing idsup' assignations from corridors*/

replace idparcel = centidsup if idparcel == "" & (centdist <polydist & centneighb <= beta & centdist <= alpha)	 	/*taking the idsup from the method yielding the lowest between the polygon and plant*/
replace idparcel = polyidsup if idparcel == "" & (centdist >= polydist & polyneighb <= beta & polydist <= alpha)		/*taking the idsup from the method yielding the lowest between the polygon and plant*/

replace idparcel = centidsup if idparcel == "" & (centneighb <= beta & centdist <= alpha)	 						/*Assigning idsup for remaining missings*/
replace idparcel = polyidsup if idparcel == "" & (polyneighb <= beta & polydist <= alpha)							/*Assigning idsup for remaining missings*/


// Step 4
*Creating the neighbors coherent with the final assignations*
gen neighbors = withneighb if withneighb != .
/*Generating the neighbor variable : Giving premium to within method*/
replace neighbors = . if corridors == 1	 																	/*Removing neighbors assignations from corridors*/

replace neighbors = centneighb if neighbors == . & (centdist < polydist & centneighb <= beta & centdist <= alpha)	/*taking the neighb from the method yielding the lowest between the polygon and plant*/
replace neighbors = polyneighb if neighbors == . & (centdist >= polydist & polyneighb <= beta & polydist <= alpha)	/*taking the neighb from the method yielding the lowest between the polygon and plant*/

replace neighbors = centneighb if neighbors == . & (centneighb <= beta & centdist <= alpha)						/*Assigning neighb for remaining missings*/
replace neighbors = polyneighb if neighbors == . & (polyneighb <= beta & polydist <= alpha)						/*Assigning neighb for remaining missings*/

// Step 5
*Creating the distance coherent with the final assignations*
gen dist = 0 if withneighb != .
/*Generating the distance variable : Giving premium to within method*/
replace dist = . if corridors == 1																			/*Removing dist assignations from corridors*/

replace dist = centdist if dist == . & (centdist < polydist & centneighb <= beta & centdist <= alpha)				/*taking the dist from the method yielding the lowest between the polygon and plant*/
replace dist = polydist if dist == . & (centdist >= polydist & polyneighb <= beta & polydist <= alpha)				/*taking the dist from the method yielding the lowest between the polygon and plant*/

replace dist = centdist if dist == . & (centneighb <= beta & centdist <= alpha)									/*Assigning dist for remaining missings*/
replace dist = polydist if dist == . & (polyneighb <= beta & polydist <= alpha)									/*Assigning dist for remaining missings*/


// Step 6
*Creating the height coherent with the adjustments of assignations*
gen height=withheight if withheight!=.																	/*Generating the height variable :Giving premium to within method*/
/*Generating the neighbor variable : Giving premium to within method*/
replace height = . if corridors == 1	 																	/*Removing neighbors assignations from corridors*/

replace height = centheight if height == . & (centdist < polydist & centheight <= beta & centdist <= alpha)	/*taking the neighb from the method yielding the lowest between the polygon and plant*/
replace height = polyheight if height == . & (centdist >= polydist & polyheight <= beta & polydist <= alpha)	/*taking the neighb from the method yielding the lowest between the polygon and plant*/

replace height = centheight if height == . & (centheight <= beta & centdist <= alpha)						/*Assigning neighb for remaining missings*/
replace height = polyheight if height == . & (polyheight <= beta & polydist <= alpha)						/*Assigning neighb for remaining missings*/

// Step 7
*Choosing one assignation for plants with more than 1 assignation due to their location at the frontier of two or more polygons  
egen disttest = min(dist), by(adressid)																	/*Generating the minimum distance between all the assignation processes and polygons for each address of location*/
gen dup = dist - disttest																				/*Identifying adressid that has more than one assignation*/
drop if dup > 0   																						/*we keep the assignation for which dup=0 and we drop the others*/
duplicates drop adressid, force																			/*We drop duplicates if any still exist*/
order _all, alphabetic																					/*Ordering variables to avoid confusion during the appending processes*/
drop dup* accuracy* corridors disttest																	/*We drop useless variables*/


// Correction some variables names and generating some other useful variables
gen prov_stateh=substr(state_id,1,2)
replace prov_stateh=upper(prov_stateh)

//Generate a variable that tells us which assignments give us the final result
gen assignedby = "border" if idparcel == polyidsup															
replace assignedby = "centroid" if idparcel == centidsup
replace assignedby = "within" if idparcel == withidsup
replace assignedby="centroid-border" if idparcel == centidsup & idparcel == polyidsup &idparcel != withidsup
replace assignedby="within-border" if idparcel == withidsup & idparcel == polyidsup &idparcel != centidsup
replace assignedby="within-center" if idparcel == withidsup & idparcel == centidsup &idparcel != polyidsup
replace assignedby="centroid-border-within" if idparcel == centidsup & idparcel == polyidsup & idparcel == withidsup

//Renaming some variables
ren dist disth
ren idparcel idparcelh
ren polygon polygonh
ren state_id state_idh
ren assignedby assignedbyh

tostring idparcel*, replace
replace idparcelh=subinstr(idparcelh, " ","",.)
replace idparcelh=subinstr(idparcelh, " ","",.)
replace idparcelh=subinstr(idparcelh, " ","",.)
replace idparcelh=subinstr(idparcelh, ".","",.)

replace FB_area_c=. if FB_area_c==0
replace FB_area_d=. if FB_area_d==0

save "$outpath/scotts_heights.dta", replace



// -------------------------------------------------------------------
// STEP 4 : processing parcels joined with the registry file of Quebec
// -------------------------------------------------------------------


use "$datapath/registry_qc_on_parcels.dta", clear

scalar alpha=75  /*threshold distance to consider assignations*/
scalar beta=10  /*Maximun number of neighbors for a polygon not to be considered as corridor or road*/

* a. Replacing 0 values by missing
replace witharea_d=. if witharea_d==0
replace centarea_d=. if centarea_d==0
replace polyarea_d=. if polyarea_d==0

* b. Choosing between the three options of spatial join : within, poly and Centro
	   *For calculated values : Those computed from the polygons using GIS tools
gen corridors=(accuracy!="ROOFTOP" & withneighb!=.) | (withneighb>beta & withneighb!=.)   				/*Identifying corridors : not a rooftop precision with within assignation OR within assignation on a polygon withnmore than 10 neighbors*/
replace corridors=. if witharea_c==.																	/*Removing areas' assignations from corridors*/
gen P_area_c=witharea_c if withneighb!=.																/*Generating the raw area variable : Giving premium to within method*/
replace P_area_c=. if corridors==1									 									/*Removing areas' assignations from corridors*/
replace P_area_c=centarea_c if P_area_c==. & centdist<polydist & centneighb<=beta & centdist<=alpha		/*taking the surface from the method yielding the lowest between the polygon and plant*/
replace P_area_c=polyarea_c if P_area_c==. & centdist>=polydist & polyneighb<=beta & polydist<=alpha	/*taking the surface from the method yielding the lowest between the polygon and plant*/
replace P_area_c=centarea_c if P_area_c==. & centneighb<beta & centdist<=alpha							/*Assigning measures for remaining missings*/
replace P_area_c=polyarea_c if P_area_c==. & polyneighb<beta	& polydist<=alpha						/*Assigning measures for remaining missings*/

		*For declared values : Those found in the datasets
gen P_area_d=witharea_d if withneighb!=.																/*Generating the raw area variable : Giving premium to within method*/
replace P_area_d=. if corridors==1									 									/*Removing areas' assignations from corridors*/
replace P_area_d=centarea_d if P_area_d==. & centdist<polydist	& centneighb<beta & centdist<=alpha		/*taking the surface from the method yielding the lowest between the polygon and plant*/
replace P_area_d=polyarea_d if P_area_d==. & centdist>=polydist	& polyneighb<beta & polydist<=alpha		/*taking the surface from the method yielding the lowest between the polygon and plant*/
replace P_area_d=centarea_c if P_area_d==. & centneighb<beta & centdist<=alpha							/*Assigning measures for remaining missings*/				
replace P_area_d=polyarea_c if P_area_d==. & polyneighb<beta & polydist<=alpha							/*Assigning measures for remaining missings*/
replace P_area_d=P_area_d*(-1) if P_area_d<0

        *Creating the parcel identifier coherent with the final assignations*
gen idparcel=withidsup if withneighb!=.																	/*Generating the identifier variable : Giving premium to within method*/
replace idparcel="" if corridors==1																		/*Removing idsup' assignations from corridors*/
replace idparcel=centidsup if idparcel=="" & centdist<polydist & centneighb<beta & centdist<=alpha	 	/*taking the idsup from the method yielding the lowest between the polygon and plant*/
replace idparcel=polyidsup if idparcel=="" & centdist>=polydist & polyneighb<beta & polydist<=alpha		/*taking the idsup from the method yielding the lowest between the polygon and plant*/
replace idparcel=centidsup if idparcel=="" & centneighb<beta & centdist<=alpha	 						/*Assigning idsup for remaining missings*/
replace idparcel=polyidsup if idparcel=="" & polyneighb<beta & polydist<=alpha							/*Assigning idsup for remaining missings*/

        *Creating the neighbors coherent with the final assignations*
gen neighbors=withneighb if withneighb!=.																/*Generating the neighbor variable : Giving premium to within method*/
replace neighbors=. if corridors==1	 																	/*Removing neighbors assignations from corridors*/
replace neighbors=centneighb if neighbors==. & centdist<polydist & centneighb<beta & centdist<=alpha	/*taking the neighb from the method yielding the lowest between the polygon and plant*/
replace neighbors=polyneighb if neighbors==. & centdist>=polydist & polyneighb<beta & polydist<=alpha	/*taking the neighb from the method yielding the lowest between the polygon and plant*/
replace neighbors=centneighb if neighbors==. & centneighb<beta & centdist<=alpha						/*Assigning neighb for remaining missings*/
replace neighbors=polyneighb if neighbors==. & polyneighb<beta & polydist<=alpha						/*Assigning neighb for remaining missings*/

		*Creating the distance coherent with the final assignations*
gen dist=0 if withneighb!=.																				/*Generating the distance variable : Giving premium to within method*/
replace dist=. if corridors==1																			/*Removing dist assignations from corridors*/
replace dist=centdist if dist==. & centdist<polydist & centneighb<beta & centdist<=alpha				/*taking the dist from the method yielding the lowest between the polygon and plant*/
replace dist=polydist if dist==. & centdist>=polydist & polyneighb<beta & polydist<=alpha				/*taking the dist from the method yielding the lowest between the polygon and plant*/
replace dist=centdist if dist==. & centneighb<beta & centdist<=alpha									/*Assigning dist for remaining missings*/
replace dist=polydist if dist==. & polyneighb<beta & polydist<=alpha									/*Assigning dist for remaining missings*/

		*Choosing one assignation for plants with more than 1 assignation due to their location at the frontier of two or more polygons  
egen disttest=min(dist), by(adressid)																	/*Generating the minimum distance between all the assignation processes and polygons for each address of location*/
gen dup=dist-disttest																					/*Identifying adressid that has more than one assignation*/
drop if dup>0   																						/*we keep the assignation for which dup=0 and we drop the others*/
duplicates drop adressid, force																			/*We drop duplicates if any still exist*/
order _all, alphabetic																					/*Ordering variables to avoid confusion during the appending processes*/
drop dup* accuracy* corridors disttest																	/*We drop useless variables*/

	*c. Correction some variables names and generating some other useful variables
replace state_id=subinstr(state_id, "_", "",.)															/*Correcting the names of polygons from with the area measures come from*/
gen prov_statep=substr(state_id,1,2)																	/*Generating a variable indicating the province to which the parcels polygon is related to*/
replace prov_statep=upper(prov_statep)																	/*Transforming province names in uppercase*/
replace prov_statep="MB" if prov_statep=="MA"															/*Hamonizing the short name of Manitoba with what already exists in the database*/
replace prov_statep="SK" if prov_statep=="SA"															/*Hamonizing the short name of Saskatchewan with what already exists in the database*/

		*Generating the assignation option variable
gen assignedby="border" if idparcel==polyidsup															
replace assignedby="centroid" if idparcel==centidsup
replace assignedby="within" if idparcel==withidsup
replace assignedby="within-border" if idparcel==withidsup & idparcel== polyidsup &idparcel!= centidsup
replace assignedby="within-center" if idparcel==withidsup & idparcel== centidsup &idparcel!= polyidsup
replace assignedby="center-border" if idparcel==centidsup & idparcel== polyidsup &idparcel!= withidsup
replace assignedby="center-border-within" if idparcel==centidsup & idparcel== polyidsup & idparcel== withidsup

*renaming some variables
ren dist distp
ren idparcel idparcelp
ren polygon polygonp
ren state_id state_idp
ren assignedby assignedbyp

tostring idparcel*,replace
replace idparcelp=subinstr(idparcelp, " ","",.)
replace idparcelp=subinstr(idparcelp, " ","",.)
replace idparcelp=subinstr(idparcelp, " ","",.)
replace idparcelp=subinstr(idparcelp, ".","",.)

replace P_area_c=. if P_area_c==0
replace P_area_d=. if P_area_d==0
save "$outpath/registry_qc_on_parcels.dta", replace

ren adressid addressid
keep addressid idparcel state_idp prov_statep lat lon
merge 1:m addressid using "$datapath/registry_qc_data.dta"
keep if _merge==3
drop _merge
ren Address address_reg
gen number_NEQ=_n
collapse (count) number_NEQ , by(idparcelp state_idp prov_statep)

merge 1:m idparcelp state_idp prov_statep using "$outpath/scotts_parcels.dta"

drop if adressid==.
drop _merge
save "$outpath/scotts_parcels.dta", replace


